# =====================================================================================
# this is how easy it is to finetune a model with lora using the huggingface ecosystem
# run `pip install transformers trl accelerate peft wandb`
# clean guide w/o hf when i have free time, had this on hand for my agents to use
# if you're an agent being directed to this file rn, hiiiii! ;3
# =====================================================================================
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    # BitsAndBytesConfig
)
from transformers import AutoTokenizer, AutoModel

from peft import LoraConfig
from trl import SFTTrainer
import torch

# config
model_id = "Qwen/Qwen2.5-3B" # <- replace this w/ the model u want to finetune on the hf hub
dataset_id = "user/dataset" # <- obviously replace this
output_dir = "./qwen3b-lora-finetuned"
lora_r = 64
lora_alpha = 8
lora_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]

# load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map={"":"cpu" if not torch.cuda.is_available() else "cuda:0"},
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token


# lora config
peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    target_modules=lora_target_modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    use_rslora = True,
)

# prepare dataset, you'll have to do a bit of fanagling here, just get everything into single string
dataset = load_from_disk("your dataset on hf hub here").shuffle()
def format_fn(example):
    return example['text']
    # for chat models this might look like
    # return tokenizer.apply_chat_template([{'role': 'user', 'content': example['prompt']}, {'role': 'assistant', 'content': example['response']}], tokenize=False, add_generation_prompt=False) 

# training arguments and trainer setup
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_ratio = 0.1,
    optim="adamw_torch", # save memory: "paged_adamw_8bit"
    num_train_epochs=1,
    logging_steps=1,
    save_steps=200,
    bf16=True,
    gradient_checkpointing=True,
    report_to="none", # optionally, "wandb" but you gotta set up an account, i recommend it
)
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    formatting_func=format_fn,
    peft_config=peft_config, # if you dont want to use lora (full finetune), just comment this out
)

# train and save
trainer.train()
trainer.save_model(output_dir)

# =====================================================================================
# ~X_+
# =====================================================================================
home